# Author    : CoffeeChicken
# Date      : 2022/1/1 14:37
# Function  : 演示 爬取剩余其他


import requests
import json
from bs4 import BeautifulSoup
import time

headers = {
    'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/85.0.4183.121 Safari/537.36"
}

hospital1 = "https://y.dxy.cn/papi/hospital/detailInfo?hospital_id={}"

hospital2 = "https://y.dxy.cn/hospitalList?location=440000&page=1&grade=2"

hospital3 = "https://y.dxy.cn/hospitalList?grade=2"

# response = requests.get(hospital3)
# print(response.text)

# soup = BeautifulSoup(response.text, 'html.parser')
# li_list = soup.select("#area > li")

province_dict = {'110000': '北京 ', '310000': '上海 ', '440000': '广东 ', '330000': '浙江 ', '320000': '江苏 ', '120000': '天津',
                 '130000': '河北', '140000': '山西', '150000': '内蒙古', '210000': '辽宁', '220000': '吉林', '230000': '黑龙江',
                 '340000': ' 安徽', '350000': '福建', '360000': '江西', '370000': '山东', '410000': '河南', '420000': '湖北',
                 '430000': '湖南', '450000': '广西', '460000': '海南', '500000': '重庆', '510000': '四川', '520000': '贵州',
                 '530000': '云南', '540000': '西藏', '610000': '陕西', '620000': '甘肃', '630000': '青海', '640000': '宁夏',
                 '650000': '新疆', '710000': '台湾', '810000': '香港', '820000': '澳门', '900000': '国外'}

# 获取全部省代码和名称
# for li in li_list:
#     province_dict[li["all_hospital-value"]] = li.text
#
# print(province_dict)

result_list = []

for keys in province_dict.keys():
    hospital_json_list = []
    result_dict = {}
    for pages in range(1, 6):
        # print(keys)
        # 拼接字符串
        province_hospital_url = "https://y.dxy.cn/hospitalList?grade=6&page={}&location={}".format(pages, keys)
        print(province_hospital_url)
        hospital_response = requests.get(province_hospital_url)
        # print(province_hospital_url)
        soup_list = BeautifulSoup(hospital_response.text, 'html.parser')
        div_list = soup_list.select(".hospital-title > a")
        hospital_ids = []
        # 获取医院的id
        for a in div_list:
            hospital_ids.append(a['href'].split('/')[-1])
        # 获取医院详情界面的url并爬取
        for hospital_id in hospital_ids:
            detail_page_url = hospital1.format(hospital_id)
            # print(detail_page_url)
            detail_page_response = requests.get(detail_page_url, headers)
            hospital_json = json.loads(detail_page_response.text)
            try:
                temp = hospital_json['results']
                del temp['info']
                del temp['leaders']
                hospital_json_list.append(temp)
                print(temp)
            except:
                continue
    result_dict['province'] = province_dict[keys]
    result_dict['info'] = hospital_json_list
    result_list.append(result_dict)

    try:
        print(hospital_json_list[0])
    except:
        continue
    time.sleep(1)


result = str(result_list).replace("'", '"')
with open('er_yi.json', 'w') as f:
    f.write(result)
